# =======================================================
# Problem 1: CryptoCurrencies & R MarkDown
# =======================================================

all_crypto <- read.csv("crypto-markets.csv")

# Structure of the CSV File
str(all_crypto)
## 'data.frame':    748363 obs. of  13 variables:
##  $ slug       : Factor w/ 1586 levels "0x","1337coin",..: 151 151 151 151 151 151 151 151 151 151 ...
##  $ symbol     : Factor w/ 1553 levels "$$$","$PAC","1337",..: 211 211 211 211 211 211 211 211 211 211 ...
##  $ name       : Factor w/ 1584 levels "0x","1World",..: 149 149 149 149 149 149 149 149 149 149 ...
##  $ date       : Factor w/ 1793 levels "2013-04-28","2013-04-29",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ ranknow    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ open       : num  135 134 144 139 116 ...
##  $ high       : num  136 147 147 140 126 ...
##  $ low        : num  132.1 134 134.1 107.7 92.3 ...
##  $ close      : num  134 145 139 117 105 ...
##  $ volume     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ market     : num  1.50e+09 1.49e+09 1.60e+09 1.54e+09 1.29e+09 ...
##  $ close_ratio: num  0.544 0.781 0.384 0.288 0.388 ...
##  $ spread     : num  3.88 13.49 12.88 32.17 33.32 ...
# ==============================================================
# Pre-processing
# ==============================================================

# Get the cryptocurrencies that we want
# ==============================================================
# BTC, ETH, XRP, BCH, LTC
# ==============================================================

btc <- all_crypto[all_crypto$symbol == 'BTC',]
eth <- all_crypto[all_crypto$symbol == 'ETH',]
xrp <- all_crypto[all_crypto$symbol == 'XRP',]
bch <- all_crypto[all_crypto$symbol == 'BCH',]
ltc <- all_crypto[all_crypto$symbol == 'LTC',]

# Filter the columns:
# ===============================================================
# Column we need : symbol, date, open, close 
# ===============================================================

btc_final <- data.frame(btc$symbol, btc$date, btc$open, btc$close)
colnames(btc_final) <- c("symbol", "date", "open", "close")

eth_final <- data.frame(eth$symbol, eth$date, eth$open, eth$close)
colnames(eth_final) <- c("symbol", "date", "open", "close")

xrp_final <- data.frame(xrp$symbol, xrp$date, xrp$open, xrp$close)
colnames(xrp_final) <- c("symbol", "date", "open", "close")

bch_final <- data.frame(bch$symbol, bch$date, bch$open, bch$close)
colnames(bch_final) <- c("symbol", "date", "open", "close")

ltc_final <- data.frame(ltc$symbol, ltc$date, ltc$open, ltc$close)
colnames(ltc_final) <- c("symbol", "date", "open", "close")

# Format the date & symbol
# ================================================================
# Current implementation is Factor
# ================================================================
btc_final$date <- as.Date(as.factor(btc_final$date))
btc_final$symbol <- as.character(as.factor(btc_final$symbol))

eth_final$date <- as.Date(as.factor(eth_final$date))
eth_final$symbol <- as.character(as.factor(eth_final$symbol))

xrp_final$date <- as.Date(as.factor(xrp_final$date))
xrp_final$symbol <- as.character(as.factor(xrp_final$symbol))

bch_final$date <- as.Date(as.factor(bch_final$date))
bch_final$symbol <- as.character(as.factor(bch_final$symbol))

ltc_final$date <- as.Date(as.factor(ltc_final$date))
ltc_final$symbol <- as.character(as.factor(ltc_final$symbol))

# ==============================================================
# Sorting the date 
# ==============================================================

btc_sorted_final <- btc_final[btc_final$date >= "2017-04-01" & btc_final$date <= "2018-04-15",]
eth_sorted_final <- eth_final[eth_final$date >= "2017-04-01" & eth_final$date <= "2018-04-15",]
xrp_sorted_final <- xrp_final[xrp_final$date >= "2017-04-01" & xrp_final$date <= "2018-04-15",]
bch_sorted_final <- bch_final[bch_final$date >= "2017-04-01" & bch_final$date <= "2018-04-15",]
ltc_sorted_final <- ltc_final[ltc_final$date >= "2017-04-01" & ltc_final$date <= "2018-04-15",]

# ===============================================================
# Data Preprocessing Done! 
# ===============================================================

library("ggplot2")
library("tseries")
library("forecast")

# ================================================================
# BITCOIN!
# ================================================================

# plot open price
ggplot() + 
  geom_line(data = btc_sorted_final, aes(x = date, y = open, color="Close Price"))

summary(btc_sorted_final$open)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1072    2564    4389    6318    9384   19476
# plot closed price 

ggplot() + 
  geom_line(data = btc_sorted_final, aes(x = date, y = close, color="Close Price"))

summary(btc_sorted_final$close)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1080    2573    4404    6338    9366   19497
# I know that in the month of December 2017 Bitcoin prices surges in a ridiculous rate.

# This affect the whole time series data as I can see the price before that huge spike
# to have a model of its own. Also the prices before let's say 6000 are completely useless
# now. 

btc_latest_model <- btc_sorted_final[btc_sorted_final$date >= "2017-04-15" & btc_sorted_final$date <= "2018-04-15",]
btc_latest_model$open_ma = ma(btc_latest_model$open, order=7) # Weekly Moving Average
btc_latest_model$open_ma30 = ma(btc_latest_model$open, order=30) # Monthly Moving Average

btc_latest_model$close_ma = ma(btc_latest_model$close, order=7) # Weekly Moving Average
btc_latest_model$close_ma30 = ma(btc_latest_model$close, order=30) # Monthly Moving Average

ggplot() + 
  geom_line(data = btc_latest_model, aes(x = date, y = open, color="open prices")) +
  geom_line(data = btc_latest_model, aes(x = date, y = open_ma, color="weekly ma")) +
  geom_line(data = btc_latest_model, aes(x = date, y = open_ma30, color="monthly ma")) 
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 30 rows containing missing values (geom_path).

ggplot() + 
  geom_line(data = btc_latest_model, aes(x = date, y = open, color="open prices")) +
  geom_line(data = btc_latest_model, aes(x = date, y = close_ma, color="weekly ma")) +
  geom_line(data = btc_latest_model, aes(x = date, y = close_ma30, color="monthly ma")) 
## Warning: Removed 6 rows containing missing values (geom_path).

## Warning: Removed 30 rows containing missing values (geom_path).

# ============================================================================
# Predict Bitcoin Open Price  
# ============================================================================

btc_open_ma <- ts(na.omit(btc_latest_model$open_ma))

# Stationarity
adf.test(btc_open_ma, alternative = "stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  btc_open_ma
## Dickey-Fuller = -1.4587, Lag order = 6, p-value = 0.805
## alternative hypothesis: stationary
# Stop if p-value < 0.05

btcLog <- log(btc_open_ma)
plot(btcLog, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(btcLog)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  btcLog
## Dickey-Fuller = -1.1303, Lag order = 6, p-value = 0.9166
## alternative hypothesis: stationary
# Stop if p-value < 0.05

btcLogDiff <- diff(btcLog)
plot(btcLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(btcLogDiff)
## Warning in adf.test(btcLogDiff): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  btcLogDiff
## Dickey-Fuller = -6.6834, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# Stop if p-value < 0.05

# ==========================================================
# ACF and PACF 
# ==========================================================

acf(btcLogDiff, main='ACF For BTC Differenced Series')

# q is 5

pacf(btcLogDiff, main='PACF For BTC Differenced Series')

# p is 4

# Arima Based on guessing
arimaFit <- arima(btcLogDiff, order = c(4,0,5))
arimaFit            # check the coefficients
## 
## Call:
## arima(x = btcLogDiff, order = c(4, 0, 5))
## 
## Coefficients:
##           ar1     ar2     ar3     ar4     ma1     ma2     ma3      ma4
##       -0.6083  0.1646  0.5395  0.2697  1.7730  1.6395  0.8007  -0.0837
## s.e.   0.1116  0.0966  0.0636  0.0657  0.1037  0.2138  0.2461   0.1887
##           ma5  intercept
##       -0.4041     0.0059
## s.e.   0.0884     0.0037
## 
## sigma^2 estimated as 8.54e-05:  log likelihood = 1100.35,  aic = -2178.69
plot(arimaFit)

arimaFitFC <- forecast(btcLogDiff, model = arimaFit, h = 10)
plot(btcLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Find the "optimal" ARIMA parameters

auto.arima(btcLogDiff,                   # the dataset
           seasonal = FALSE,            # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -2091.709
##  ARIMA(0,0,0) with non-zero mean : -1634.916
##  ARIMA(1,0,0) with non-zero mean : -2090.902
##  ARIMA(0,0,1) with non-zero mean : -1889.013
##  ARIMA(0,0,0) with zero mean     : -1612.355
##  ARIMA(2,0,1) with non-zero mean : -2088.604
##  ARIMA(1,0,2) with non-zero mean : -2090.058
##  ARIMA(2,0,2) with non-zero mean : -2086.859
##  ARIMA(1,0,1) with zero mean     : -2091.609
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(1,0,1) with non-zero mean : -2091.342
## 
##  Best model: ARIMA(1,0,1) with non-zero mean
## Series: btcLogDiff 
## ARIMA(1,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1     ma1    mean
##       0.8335  0.1019  0.0059
## s.e.  0.0341  0.0614  0.0038
## 
## sigma^2 estimated as 0.0001181:  log likelihood=1049.67
## AIC=-2091.34   AICc=-2091.22   BIC=-2076.05
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(btcLogDiff, order = c(1,0,1))
arimaOpt
## 
## Call:
## arima(x = btcLogDiff, order = c(1, 0, 1))
## 
## Coefficients:
##          ar1     ma1  intercept
##       0.8335  0.1019     0.0059
## s.e.  0.0341  0.0614     0.0038
## 
## sigma^2 estimated as 0.000117:  log likelihood = 1049.67,  aic = -2091.34
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(btcLogDiff, model = arimaOpt, h = 10)
plot(btcLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(btcLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

auto.arima(btcLogDiff,                   # the dataset
           seasonal = TRUE,             # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -2091.709
##  ARIMA(0,0,0) with non-zero mean : -1634.916
##  ARIMA(1,0,0) with non-zero mean : -2090.902
##  ARIMA(0,0,1) with non-zero mean : -1889.013
##  ARIMA(0,0,0) with zero mean     : -1612.355
##  ARIMA(2,0,1) with non-zero mean : -2088.604
##  ARIMA(1,0,2) with non-zero mean : -2090.058
##  ARIMA(2,0,2) with non-zero mean : -2086.859
##  ARIMA(1,0,1) with zero mean     : -2091.609
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(1,0,1) with non-zero mean : -2091.342
## 
##  Best model: ARIMA(1,0,1) with non-zero mean
## Series: btcLogDiff 
## ARIMA(1,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1     ma1    mean
##       0.8335  0.1019  0.0059
## s.e.  0.0341  0.0614  0.0038
## 
## sigma^2 estimated as 0.0001181:  log likelihood=1049.67
## AIC=-2091.34   AICc=-2091.22   BIC=-2076.05
# Not SEASONAL! :D

# Check the performance/accuracy
tsdisplay(residuals(arimaOpt), lag.max = 80)

# ============================================================================
# Predict Bitcoin Close Price  
# ============================================================================

btc_close_ma <- ts(na.omit(btc_latest_model$close_ma))

# Stationarity
adf.test(btc_close_ma, alternative = "stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  btc_close_ma
## Dickey-Fuller = -1.4453, Lag order = 6, p-value = 0.8107
## alternative hypothesis: stationary
# Stop if p-value < 0.05

btcCloseLog <- log(btc_close_ma)
plot(btcCloseLog, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(btcCloseLog)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  btcCloseLog
## Dickey-Fuller = -1.0811, Lag order = 6, p-value = 0.9246
## alternative hypothesis: stationary
# Stop if p-value < 0.05

btcCloseLogDiff <- diff(btcCloseLog)
plot(btcCloseLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(btcCloseLogDiff)
## Warning in adf.test(btcCloseLogDiff): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  btcCloseLogDiff
## Dickey-Fuller = -6.6921, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# Stop if p-value < 0.05

# ==========================================================
# ACF and PACF 
# ==========================================================

acf(btcCloseLogDiff, main='ACF For BTC Differenced Series')

# q is 2

pacf(btcCloseLogDiff, main='PACF For BTC Differenced Series')

# p is 4

# Arima Based on guessing
arimaFit <- arima(btcCloseLogDiff, order = c(4,0,2))
arimaFit            # check the coefficients
## 
## Call:
## arima(x = btcCloseLogDiff, order = c(4, 0, 2))
## 
## Coefficients:
##          ar1      ar2     ar3      ar4     ma1     ma2  intercept
##       0.4776  -0.3805  0.6350  -0.1131  0.4702  0.9664     0.0059
## s.e.  0.0572   0.0570  0.0578   0.0557  0.0203  0.0230     0.0035
## 
## sigma^2 estimated as 0.0001037:  log likelihood = 1068.85,  aic = -2121.71
plot(arimaFit)

arimaFitFC <- forecast(btcCloseLogDiff, model = arimaFit, h = 10)
plot(btcCloseLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Find the "optimal" ARIMA parameters

auto.arima(btcCloseLogDiff,                   # the dataset
           seasonal = FALSE,            # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -2085.794
##  ARIMA(0,0,0) with non-zero mean : -1633.821
##  ARIMA(1,0,0) with non-zero mean : -2085.464
##  ARIMA(0,0,1) with non-zero mean : -1886.411
##  ARIMA(0,0,0) with zero mean     : -1611.426
##  ARIMA(2,0,1) with non-zero mean : -2083.142
##  ARIMA(1,0,2) with non-zero mean : -2084.015
##  ARIMA(2,0,2) with non-zero mean : -2081.276
##  ARIMA(1,0,1) with zero mean     : -2085.801
##  ARIMA(0,0,1) with zero mean     : -1870.209
##  ARIMA(2,0,1) with zero mean     : -2083.185
##  ARIMA(1,0,0) with zero mean     : -2085.731
##  ARIMA(1,0,2) with zero mean     : -2083.954
##  ARIMA(2,0,2) with zero mean     : -2081.274
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(1,0,1) with zero mean     : -2085.278
## 
##  Best model: ARIMA(1,0,1) with zero mean
## Series: btcCloseLogDiff 
## ARIMA(1,0,1) with zero mean 
## 
## Coefficients:
##          ar1     ma1
##       0.8461  0.0876
## s.e.  0.0327  0.0616
## 
## sigma^2 estimated as 0.0001206:  log likelihood=1045.64
## AIC=-2085.28   AICc=-2085.21   BIC=-2073.81
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(btcCloseLogDiff, order = c(1,0,1))
arimaOpt
## 
## Call:
## arima(x = btcCloseLogDiff, order = c(1, 0, 1))
## 
## Coefficients:
##          ar1     ma1  intercept
##       0.8333  0.0937     0.0059
## s.e.  0.0343  0.0621     0.0038
## 
## sigma^2 estimated as 0.0001191:  log likelihood = 1046.72,  aic = -2085.44
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(btcCloseLogDiff, model = arimaOpt, h = 10)
plot(btcCloseLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals(arimaOpt), lag.max = 80)

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(btcCloseLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

auto.arima(btcCloseLogDiff,                   # the dataset
           seasonal = TRUE,             # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -2085.794
##  ARIMA(0,0,0) with non-zero mean : -1633.821
##  ARIMA(1,0,0) with non-zero mean : -2085.464
##  ARIMA(0,0,1) with non-zero mean : -1886.411
##  ARIMA(0,0,0) with zero mean     : -1611.426
##  ARIMA(2,0,1) with non-zero mean : -2083.142
##  ARIMA(1,0,2) with non-zero mean : -2084.015
##  ARIMA(2,0,2) with non-zero mean : -2081.276
##  ARIMA(1,0,1) with zero mean     : -2085.801
##  ARIMA(0,0,1) with zero mean     : -1870.209
##  ARIMA(2,0,1) with zero mean     : -2083.185
##  ARIMA(1,0,0) with zero mean     : -2085.731
##  ARIMA(1,0,2) with zero mean     : -2083.954
##  ARIMA(2,0,2) with zero mean     : -2081.274
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(1,0,1) with zero mean     : -2085.278
## 
##  Best model: ARIMA(1,0,1) with zero mean
## Series: btcCloseLogDiff 
## ARIMA(1,0,1) with zero mean 
## 
## Coefficients:
##          ar1     ma1
##       0.8461  0.0876
## s.e.  0.0327  0.0616
## 
## sigma^2 estimated as 0.0001206:  log likelihood=1045.64
## AIC=-2085.28   AICc=-2085.21   BIC=-2073.81
# Not SEASONAL! :D
# Ethereum 
# plot
ggplot() + 
  geom_line(data = eth_sorted_final, aes(x = date, y = open, color="Open Price"))

summary(eth_sorted_final$open)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    41.8   231.7   308.1   432.0   683.3  1397.5
# I know that in the month of December 2017 Crypto prices surges in a ridiculous rate.

# This affect the whole time series data as I can see the price before that huge spike
# to have a model of its own. Also the prices before let's say 6000 are completely useless
# now. 

eth_latest_model <- eth_sorted_final[eth_sorted_final$date >= "2017-11-15" & eth_sorted_final$date <= "2018-04-15",]
eth_latest_model$open_ma = ma(eth_latest_model$open, order=7) # Weekly Moving Average
eth_latest_model$open_ma30 = ma(eth_latest_model$open, order=30) # Monthly Moving Average

eth_latest_model$close_ma = ma(eth_latest_model$close, order=7) # Weekly Moving Average
eth_latest_model$close_ma30 = ma(eth_latest_model$close, order=30) # Monthly Moving Average

ggplot() + 
  geom_line(data = eth_latest_model, aes(x = date, y = open, color="open Price")) + 
  geom_line(data = eth_latest_model, aes(x = date, y = open_ma, color="weekly moving average") ) + 
  geom_line(data = eth_latest_model, aes(x = date, y = open_ma30, color="weekly moving average") )
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 30 rows containing missing values (geom_path).

# ==================================================================================
# Ethereum Open Price
# ==================================================================================

eth_open_ma <- ts(na.omit(eth_latest_model$open_ma))


# From the Decomposed data, we can see that it is very seasonal, and there's trend is going down

# Stationarity
adf.test(eth_open_ma, alternative = "stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  eth_open_ma
## Dickey-Fuller = -0.9951, Lag order = 4, p-value = 0.9356
## alternative hypothesis: stationary
# Stop if p-value < 0.05
# Stationarize the Time Series before fitting ARIMA
# Desirable Stationarity properties are as follows:
# -- Time-independent Variance
# -- Time-independent Mean
# -- Time-independent Autocorrelation

ethLog <- log(eth_open_ma)
plot(ethLog, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(ethLog)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ethLog
## Dickey-Fuller = -0.66433, Lag order = 4, p-value = 0.9713
## alternative hypothesis: stationary
ethLogDiff <- diff(ethLog)
plot(ethLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(ethLogDiff)
## Warning in adf.test(ethLogDiff): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ethLogDiff
## Dickey-Fuller = -4.2742, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# ==========================================================
# ACF and PACF 
# ==========================================================

acf(ethLogDiff, main='ACF For BTC Differenced Series')

# q is 5

pacf(ethLogDiff, main='PACF For BTC Differenced Series')

# p is 4

# Arima Based on guessing
arimaFit <- arima(ethLogDiff, order = c(4,0,5))
arimaFit            # check the coefficients
## 
## Call:
## arima(x = ethLogDiff, order = c(4, 0, 5))
## 
## Coefficients:
##          ar1     ar2     ar3      ar4     ma1     ma2      ma3      ma4
##       0.2276  0.7698  0.3351  -0.3762  1.0056  0.1205  -0.5684  -0.5570
## s.e.  0.2298  0.1980  0.1830   0.1923  0.2733  0.4945   0.2411   0.2976
##           ma5  intercept
##       -0.5582     0.0035
## s.e.   0.1995     0.0094
## 
## sigma^2 estimated as 0.000136:  log likelihood = 372.19,  aic = -722.37
plot(arimaFit)

arimaFitFC <- forecast(ethLogDiff, model = arimaFit, h = 10)
plot(ethLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Residuals is okay! but the ACF and PACF do have one or two lines coming out!
# Find the "optimal" ARIMA parameters

auto.arima(ethLogDiff,                   # the dataset
           seasonal = FALSE,            # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  ARIMA(1,0,1) with non-zero mean : -712.6431
##  ARIMA(0,0,0) with non-zero mean : -522.6317
##  ARIMA(1,0,0) with non-zero mean : -713.8077
##  ARIMA(0,0,1) with non-zero mean : -619.4417
##  ARIMA(0,0,0) with zero mean     : -522.556
##  ARIMA(2,0,0) with non-zero mean : -712.8497
##  ARIMA(2,0,1) with non-zero mean : -716.0577
##  ARIMA(2,0,1) with zero mean     : -717.7185
##  ARIMA(1,0,1) with zero mean     : -714.5004
##  ARIMA(3,0,1) with zero mean     : -717.2476
##  ARIMA(2,0,0) with zero mean     : -714.7007
##  ARIMA(2,0,2) with zero mean     : -717.979
##  ARIMA(3,0,3) with zero mean     : Inf
##  ARIMA(2,0,2) with non-zero mean : -716.3345
##  ARIMA(1,0,2) with zero mean     : -713.7408
##  ARIMA(3,0,2) with zero mean     : Inf
##  ARIMA(2,0,3) with zero mean     : -719.6245
##  ARIMA(3,0,4) with zero mean     : -726.2259
##  ARIMA(3,0,4) with non-zero mean : -724.4744
##  ARIMA(2,0,4) with zero mean     : -724.0828
##  ARIMA(4,0,4) with zero mean     : -722.476
##  ARIMA(3,0,5) with zero mean     : -732.9367
##  ARIMA(3,0,5) with non-zero mean : -731.1599
##  ARIMA(2,0,5) with zero mean     : -723.5901
##  ARIMA(4,0,5) with zero mean     : Inf
## 
##  Best model: ARIMA(3,0,5) with zero mean
## Series: ethLogDiff 
## ARIMA(3,0,5) with zero mean 
## 
## Coefficients:
##           ar1     ar2     ar3     ma1     ma2     ma3      ma4      ma5
##       -0.3620  0.3591  0.5579  1.5826  1.2777  0.4287  -0.1296  -0.3796
## s.e.   0.1256  0.0979  0.0888  0.1385  0.2462  0.2507   0.2188   0.1230
## 
## sigma^2 estimated as 0.0001395:  log likelihood=375.47
## AIC=-732.94   AICc=-731.36   BIC=-707.55
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(ethLogDiff, order = c(3,0,5))
arimaOpt
## 
## Call:
## arima(x = ethLogDiff, order = c(3, 0, 5))
## 
## Coefficients:
##           ar1     ar2     ar3     ma1     ma2     ma3      ma4      ma5
##       -0.3660  0.3544  0.5552  1.5854  1.2832  0.4341  -0.1266  -0.3788
## s.e.   0.1261  0.0990  0.0889  0.1395  0.2479  0.2527   0.2194   0.1230
##       intercept
##          0.0039
## s.e.     0.0082
## 
## sigma^2 estimated as 0.0001302:  log likelihood = 375.58,  aic = -731.16
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(ethLogDiff, model = arimaOpt, h = 10)
plot(ethLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals((arimaOpt), lag.max = 80))

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(ethLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

auto.arima(ethLogDiff,                   # the dataset
           seasonal = TRUE,             # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  ARIMA(1,0,1) with non-zero mean : -712.6431
##  ARIMA(0,0,0) with non-zero mean : -522.6317
##  ARIMA(1,0,0) with non-zero mean : -713.8077
##  ARIMA(0,0,1) with non-zero mean : -619.4417
##  ARIMA(0,0,0) with zero mean     : -522.556
##  ARIMA(2,0,0) with non-zero mean : -712.8497
##  ARIMA(2,0,1) with non-zero mean : -716.0577
##  ARIMA(2,0,1) with zero mean     : -717.7185
##  ARIMA(1,0,1) with zero mean     : -714.5004
##  ARIMA(3,0,1) with zero mean     : -717.2476
##  ARIMA(2,0,0) with zero mean     : -714.7007
##  ARIMA(2,0,2) with zero mean     : -717.979
##  ARIMA(3,0,3) with zero mean     : Inf
##  ARIMA(2,0,2) with non-zero mean : -716.3345
##  ARIMA(1,0,2) with zero mean     : -713.7408
##  ARIMA(3,0,2) with zero mean     : Inf
##  ARIMA(2,0,3) with zero mean     : -719.6245
##  ARIMA(3,0,4) with zero mean     : -726.2259
##  ARIMA(3,0,4) with non-zero mean : -724.4744
##  ARIMA(2,0,4) with zero mean     : -724.0828
##  ARIMA(4,0,4) with zero mean     : -722.476
##  ARIMA(3,0,5) with zero mean     : -732.9367
##  ARIMA(3,0,5) with non-zero mean : -731.1599
##  ARIMA(2,0,5) with zero mean     : -723.5901
##  ARIMA(4,0,5) with zero mean     : Inf
## 
##  Best model: ARIMA(3,0,5) with zero mean
## Series: ethLogDiff 
## ARIMA(3,0,5) with zero mean 
## 
## Coefficients:
##           ar1     ar2     ar3     ma1     ma2     ma3      ma4      ma5
##       -0.3620  0.3591  0.5579  1.5826  1.2777  0.4287  -0.1296  -0.3796
## s.e.   0.1256  0.0979  0.0888  0.1385  0.2462  0.2507   0.2188   0.1230
## 
## sigma^2 estimated as 0.0001395:  log likelihood=375.47
## AIC=-732.94   AICc=-731.36   BIC=-707.55
# It is not seasonal! 

# ============================================================================
# Predict ETHEREUM Close Price  
# ============================================================================

eth_close_ma <- ts(na.omit(eth_latest_model$close_ma))

# Stationarity
adf.test(eth_close_ma, alternative = "stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  eth_close_ma
## Dickey-Fuller = -1.1052, Lag order = 4, p-value = 0.9183
## alternative hypothesis: stationary
# Stop if p-value < 0.05

ethCloseLog <- log(eth_close_ma)
plot(ethCloseLog, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(ethCloseLog)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ethCloseLog
## Dickey-Fuller = -0.87351, Lag order = 4, p-value = 0.9527
## alternative hypothesis: stationary
# Stop if p-value < 0.05

ethCloseLogDiff <- diff(ethCloseLog)
plot(ethCloseLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(ethCloseLogDiff)
## Warning in adf.test(ethCloseLogDiff): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ethCloseLogDiff
## Dickey-Fuller = -4.2208, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
# Stop if p-value < 0.05

# ==========================================================
# ACF and PACF 
# ==========================================================

acf(ethCloseLogDiff, main='ACF For BTC Differenced Series')

# q is 2

pacf(ethCloseLogDiff, main='PACF For BTC Differenced Series')

# p is 4

# Arima Based on guessing
arimaFit <- arima(ethCloseLogDiff, order = c(4,0,2))
arimaFit            # check the coefficients
## 
## Call:
## arima(x = ethCloseLogDiff, order = c(4, 0, 2))
## 
## Coefficients:
##          ar1     ar2     ar3      ar4     ma1     ma2  intercept
##       0.0576  0.1352  0.7262  -0.2832  0.9489  0.7566     0.0038
## s.e.  0.3105  0.0970  0.1293   0.1253  0.3478  0.1561     0.0081
## 
## sigma^2 estimated as 0.0001577:  log likelihood = 365.67,  aic = -715.34
plot(arimaFit)

arimaFitFC <- forecast(ethCloseLogDiff, model = arimaFit, h = 10)
plot(ethCloseLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Find the "optimal" ARIMA parameters

auto.arima(ethCloseLogDiff,                   # the dataset
           seasonal = FALSE,            # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  ARIMA(1,0,1) with non-zero mean : -706.5708
##  ARIMA(0,0,0) with non-zero mean : -522.1272
##  ARIMA(1,0,0) with non-zero mean : -708.0859
##  ARIMA(0,0,1) with non-zero mean : -617.4429
##  ARIMA(0,0,0) with zero mean     : -522.1977
##  ARIMA(2,0,0) with non-zero mean : -706.6674
##  ARIMA(2,0,1) with non-zero mean : -709.0727
##  ARIMA(2,0,1) with zero mean     : -710.7205
##  ARIMA(1,0,1) with zero mean     : -708.3808
##  ARIMA(3,0,1) with zero mean     : -710.3137
##  ARIMA(2,0,0) with zero mean     : -708.4748
##  ARIMA(2,0,2) with zero mean     : -711.176
##  ARIMA(3,0,3) with zero mean     : -718.3065
##  ARIMA(3,0,3) with non-zero mean : -716.514
##  ARIMA(2,0,3) with zero mean     : -713.25
##  ARIMA(4,0,3) with zero mean     : -719.4527
##  ARIMA(4,0,2) with zero mean     : -717.1257
##  ARIMA(4,0,4) with zero mean     : -716.2337
##  ARIMA(3,0,2) with zero mean     : Inf
##  ARIMA(5,0,4) with zero mean     : -733.8654
##  ARIMA(5,0,4) with non-zero mean : -732.0501
##  ARIMA(5,0,3) with zero mean     : -718.2465
##  ARIMA(5,0,5) with zero mean     : -731.9022
## 
##  Best model: ARIMA(5,0,4) with zero mean
## Series: ethCloseLogDiff 
## ARIMA(5,0,4) with zero mean 
## 
## Coefficients:
##           ar1      ar2     ar3     ar4     ar5     ma1     ma2     ma3
##       -1.1183  -0.2692  0.6103  0.6209  0.1395  2.2771  2.6961  1.9560
## s.e.   0.1363   0.1628  0.1302  0.1638  0.1199  0.0971  0.1800  0.1758
##          ma4
##       0.7806
## s.e.  0.0854
## 
## sigma^2 estimated as 0.0001384:  log likelihood=376.93
## AIC=-733.87   AICc=-731.92   BIC=-705.66
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(ethCloseLogDiff, order = c(1,0,1))
arimaOpt
## 
## Call:
## arima(x = ethCloseLogDiff, order = c(1, 0, 1))
## 
## Coefficients:
##          ar1     ma1  intercept
##       0.8651  0.0657     0.0040
## s.e.  0.0483  0.0949     0.0091
## 
## sigma^2 estimated as 0.0001818:  log likelihood = 357.29,  aic = -706.57
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(ethCloseLogDiff, model = arimaOpt, h = 10)
plot(ethCloseLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals(arimaOpt), lag.max = 80)

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(ethCloseLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

auto.arima(ethCloseLogDiff,                   # the dataset
           seasonal = TRUE,             # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  ARIMA(1,0,1) with non-zero mean : -706.5708
##  ARIMA(0,0,0) with non-zero mean : -522.1272
##  ARIMA(1,0,0) with non-zero mean : -708.0859
##  ARIMA(0,0,1) with non-zero mean : -617.4429
##  ARIMA(0,0,0) with zero mean     : -522.1977
##  ARIMA(2,0,0) with non-zero mean : -706.6674
##  ARIMA(2,0,1) with non-zero mean : -709.0727
##  ARIMA(2,0,1) with zero mean     : -710.7205
##  ARIMA(1,0,1) with zero mean     : -708.3808
##  ARIMA(3,0,1) with zero mean     : -710.3137
##  ARIMA(2,0,0) with zero mean     : -708.4748
##  ARIMA(2,0,2) with zero mean     : -711.176
##  ARIMA(3,0,3) with zero mean     : -718.3065
##  ARIMA(3,0,3) with non-zero mean : -716.514
##  ARIMA(2,0,3) with zero mean     : -713.25
##  ARIMA(4,0,3) with zero mean     : -719.4527
##  ARIMA(4,0,2) with zero mean     : -717.1257
##  ARIMA(4,0,4) with zero mean     : -716.2337
##  ARIMA(3,0,2) with zero mean     : Inf
##  ARIMA(5,0,4) with zero mean     : -733.8654
##  ARIMA(5,0,4) with non-zero mean : -732.0501
##  ARIMA(5,0,3) with zero mean     : -718.2465
##  ARIMA(5,0,5) with zero mean     : -731.9022
## 
##  Best model: ARIMA(5,0,4) with zero mean
## Series: ethCloseLogDiff 
## ARIMA(5,0,4) with zero mean 
## 
## Coefficients:
##           ar1      ar2     ar3     ar4     ar5     ma1     ma2     ma3
##       -1.1183  -0.2692  0.6103  0.6209  0.1395  2.2771  2.6961  1.9560
## s.e.   0.1363   0.1628  0.1302  0.1638  0.1199  0.0971  0.1800  0.1758
##          ma4
##       0.7806
## s.e.  0.0854
## 
## sigma^2 estimated as 0.0001384:  log likelihood=376.93
## AIC=-733.87   AICc=-731.92   BIC=-705.66
# Not SEASONAL! :D
# Ripple 
# plot
ggplot() + 
  geom_line(data = xrp_sorted_final, aes(x = date, y = open, color="Open Price"))

summary(xrp_sorted_final$open)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.02174 0.19240 0.24285 0.49641 0.72055 3.36000
# I know that in the month of December 2017 Crypto prices surges in a ridiculous rate.

# This affect the whole time series data as I can see the price before that huge spike
# to have a model of its own. Also the prices before let's say 6000 are completely useless
# now. 

xrp_latest_model <- xrp_sorted_final[xrp_sorted_final$date >= "2017-04-15" & xrp_sorted_final$date <= "2018-04-15",]
xrp_latest_model$open_ma = ma(xrp_latest_model$open, order=7) # Weekly Moving Average
xrp_latest_model$open_ma30 = ma(xrp_latest_model$open, order=30) # Monthly Moving Average

xrp_latest_model$close_ma = ma(xrp_latest_model$close, order=7)
xrp_latest_model$close_ma30 = ma(xrp_latest_model$close, order=30)


ggplot() + 
  geom_line(data = xrp_latest_model, aes(x = date, y = open, color="open Price")) + 
  geom_line(data = xrp_latest_model, aes(x = date, y = open_ma, color="weekly moving average") ) + 
  geom_line(data = xrp_latest_model, aes(x = date, y = open_ma30, color="weekly moving average") )
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 30 rows containing missing values (geom_path).

# ==================================================================================
# Ripple Open Price
# ==================================================================================

xrp_open_ma <- ts(na.omit(xrp_latest_model$open_ma))

# From the Decomposed data, we can see that it is very seasonal, and there's trend is going down

# Stationarity
adf.test(xrp_open_ma, alternative = "stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  xrp_open_ma
## Dickey-Fuller = -2.0655, Lag order = 6, p-value = 0.549
## alternative hypothesis: stationary
# Stop if p-value < 0.05

xrpLog <- log(xrp_open_ma)
plot(xrpLog, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(xrpLog)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  xrpLog
## Dickey-Fuller = -2.8887, Lag order = 6, p-value = 0.2018
## alternative hypothesis: stationary
xrpLogDiff <- diff(xrpLog)
plot(xrpLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(xrpLogDiff)
## Warning in adf.test(xrpLogDiff): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  xrpLogDiff
## Dickey-Fuller = -5.4103, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# ==========================================================
# ACF and PACF 
# ==========================================================

acf(xrpLogDiff, main='ACF For BTC Differenced Series')

# q is 1

pacf(xrpLogDiff, main='PACF For BTC Differenced Series')

# p is 3

# Arima Based on guessing
arimaFit <- arima(xrpLogDiff, order = c(3,0,1))
arimaFit            # check the coefficients
## 
## Call:
## arima(x = xrpLogDiff, order = c(3, 0, 1))
## 
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
##          ar1     ar2     ar3     ma1  intercept
##       0.5391  0.3065  0.0048  0.3382     0.0085
## s.e.     NaN     NaN     NaN     NaN     0.0102
## 
## sigma^2 estimated as 0.0004648:  log likelihood = 816.52,  aic = -1621.05
plot(arimaFit)

arimaFitFC <- forecast(xrpLogDiff, model = arimaFit, h = 10)
plot(xrpLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Residuals is okay! but the ACF and PACF do have one or two lines coming out!
# Find the "optimal" ARIMA parameters

auto.arima(xrpLogDiff,                   # the dataset
           seasonal = FALSE,            # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -1625.647
##  ARIMA(0,0,0) with non-zero mean : -1101.663
##  ARIMA(1,0,0) with non-zero mean : -1627.609
##  ARIMA(0,0,1) with non-zero mean : -1361.615
##  ARIMA(0,0,0) with zero mean     : -1091.572
##  ARIMA(2,0,0) with non-zero mean : -1624.791
##  ARIMA(2,0,1) with non-zero mean : -1622.886
##  ARIMA(1,0,0) with zero mean     : -1628.901
##  ARIMA(2,0,0) with zero mean     : -1626.066
##  ARIMA(1,0,1) with zero mean     : -1626.954
##  ARIMA(2,0,1) with zero mean     : -1624.24
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(1,0,0) with zero mean     : -1628.314
## 
##  Best model: ARIMA(1,0,0) with zero mean
## Series: xrpLogDiff 
## ARIMA(1,0,0) with zero mean 
## 
## Coefficients:
##          ar1
##       0.8906
## s.e.  0.0241
## 
## sigma^2 estimated as 0.0004671:  log likelihood=816.16
## AIC=-1628.31   AICc=-1628.28   BIC=-1620.67
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(xrpLogDiff, order = c(1,0,0))
arimaOpt
## 
## Call:
## arima(x = xrpLogDiff, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.8868     0.0087
## s.e.  0.0246     0.0101
## 
## sigma^2 estimated as 0.0004648:  log likelihood = 816.5,  aic = -1627.01
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(xrpLogDiff, model = arimaOpt, h = 10)
plot(xrpLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals((arimaOpt), lag.max = 80))

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(xrpLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

auto.arima(xrpLogDiff,                   # the dataset
           seasonal = TRUE,             # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -1625.647
##  ARIMA(0,0,0) with non-zero mean : -1101.663
##  ARIMA(1,0,0) with non-zero mean : -1627.609
##  ARIMA(0,0,1) with non-zero mean : -1361.615
##  ARIMA(0,0,0) with zero mean     : -1091.572
##  ARIMA(2,0,0) with non-zero mean : -1624.791
##  ARIMA(2,0,1) with non-zero mean : -1622.886
##  ARIMA(1,0,0) with zero mean     : -1628.901
##  ARIMA(2,0,0) with zero mean     : -1626.066
##  ARIMA(1,0,1) with zero mean     : -1626.954
##  ARIMA(2,0,1) with zero mean     : -1624.24
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(1,0,0) with zero mean     : -1628.314
## 
##  Best model: ARIMA(1,0,0) with zero mean
## Series: xrpLogDiff 
## ARIMA(1,0,0) with zero mean 
## 
## Coefficients:
##          ar1
##       0.8906
## s.e.  0.0241
## 
## sigma^2 estimated as 0.0004671:  log likelihood=816.16
## AIC=-1628.31   AICc=-1628.28   BIC=-1620.67
# It is not seasonal! 

# ==================================================================================
# Ripple Close Price
# ==================================================================================

xrp_close_ma <- ts(na.omit(xrp_latest_model$close_ma))

# Stationarity
adf.test(xrp_close_ma, alternative = "stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  xrp_close_ma
## Dickey-Fuller = -2.0289, Lag order = 6, p-value = 0.5645
## alternative hypothesis: stationary
# Stop if p-value < 0.05

xrpCloseLog <- log(xrp_close_ma)
plot(xrpCloseLog, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(xrpCloseLog)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  xrpCloseLog
## Dickey-Fuller = -2.8636, Lag order = 6, p-value = 0.2124
## alternative hypothesis: stationary
# Stop if p-value < 0.05

xrpCloseLogDiff <- diff(xrpCloseLog)
plot(xrpCloseLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(xrpCloseLogDiff)
## Warning in adf.test(xrpCloseLogDiff): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  xrpCloseLogDiff
## Dickey-Fuller = -5.3281, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# Stop if p-value < 0.05

# ==========================================================
# ACF and PACF 
# ==========================================================

acf(xrpCloseLogDiff, main='ACF For BTC Differenced Series')

# q is 2

pacf(xrpCloseLogDiff, main='PACF For BTC Differenced Series')

# p is 4

# Arima Based on guessing
arimaFit <- arima(xrpCloseLogDiff, order = c(4,0,2))
arimaFit            # check the coefficients
## 
## Call:
## arima(x = xrpCloseLogDiff, order = c(4, 0, 2))
## 
## Coefficients:
##          ar1      ar2     ar3      ar4     ma1     ma2  intercept
##       0.4287  -0.3895  0.7035  -0.0146  0.4708  0.9837     0.0081
## s.e.  0.0555   0.0506  0.0507   0.0553  0.0124  0.0191     0.0098
## 
## sigma^2 estimated as 0.0004165:  log likelihood = 833.56,  aic = -1651.11
plot(arimaFit)

arimaFitFC <- forecast(xrpCloseLogDiff, model = arimaFit, h = 10)
plot(xrpCloseLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Find the "optimal" ARIMA parameters

auto.arima(xrpCloseLogDiff,                   # the dataset
           seasonal = FALSE,            # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -1629.284
##  ARIMA(0,0,0) with non-zero mean : -1101.987
##  ARIMA(1,0,0) with non-zero mean : -1631.225
##  ARIMA(0,0,1) with non-zero mean : -1360.635
##  ARIMA(0,0,0) with zero mean     : -1091.906
##  ARIMA(2,0,0) with non-zero mean : -1628.296
##  ARIMA(2,0,1) with non-zero mean : Inf
##  ARIMA(1,0,0) with zero mean     : -1632.507
##  ARIMA(2,0,0) with zero mean     : -1629.591
##  ARIMA(1,0,1) with zero mean     : -1630.585
##  ARIMA(2,0,1) with zero mean     : -1628.274
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(1,0,0) with zero mean     : -1631.87
## 
##  Best model: ARIMA(1,0,0) with zero mean
## Series: xrpCloseLogDiff 
## ARIMA(1,0,0) with zero mean 
## 
## Coefficients:
##          ar1
##       0.8918
## s.e.  0.0240
## 
## sigma^2 estimated as 0.0004622:  log likelihood=817.94
## AIC=-1631.87   AICc=-1631.83   BIC=-1624.22
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(xrpCloseLogDiff, order = c(1,0,1))
arimaOpt
## 
## Call:
## arima(x = xrpCloseLogDiff, order = c(1, 0, 1))
## 
## Coefficients:
##          ar1      ma1  intercept
##       0.8912  -0.0142     0.0083
## s.e.  0.0269   0.0579     0.0103
## 
## sigma^2 estimated as 0.0004599:  log likelihood = 818.28,  aic = -1628.57
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(xrpCloseLogDiff, model = arimaOpt, h = 10)
plot(xrpCloseLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals(arimaOpt), lag.max = 80)

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(xrpCloseLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

auto.arima(xrpCloseLogDiff,                   # the dataset
           seasonal = TRUE,             # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -1629.284
##  ARIMA(0,0,0) with non-zero mean : -1101.987
##  ARIMA(1,0,0) with non-zero mean : -1631.225
##  ARIMA(0,0,1) with non-zero mean : -1360.635
##  ARIMA(0,0,0) with zero mean     : -1091.906
##  ARIMA(2,0,0) with non-zero mean : -1628.296
##  ARIMA(2,0,1) with non-zero mean : Inf
##  ARIMA(1,0,0) with zero mean     : -1632.507
##  ARIMA(2,0,0) with zero mean     : -1629.591
##  ARIMA(1,0,1) with zero mean     : -1630.585
##  ARIMA(2,0,1) with zero mean     : -1628.274
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(1,0,0) with zero mean     : -1631.87
## 
##  Best model: ARIMA(1,0,0) with zero mean
## Series: xrpCloseLogDiff 
## ARIMA(1,0,0) with zero mean 
## 
## Coefficients:
##          ar1
##       0.8918
## s.e.  0.0240
## 
## sigma^2 estimated as 0.0004622:  log likelihood=817.94
## AIC=-1631.87   AICc=-1631.83   BIC=-1624.22
# Not SEASONAL! :D
# Bitcoin Cash 
# plot
ggplot() + 
  geom_line(data = bch_sorted_final, aes(x = date, y = open, color="Open Price"))

summary(bch_sorted_final$open)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   212.2   439.5  1002.1  1106.7  1489.2  3909.0
# I know that in the month of December 2017 Crypto prices surges in a ridiculous rate.

# This affect the whole time series data as I can see the price before that huge spike
# to have a model of its own. Also the prices before let's say 6000 are completely useless
# now. 

bch_latest_model <- bch_sorted_final[bch_sorted_final$date >= "2017-04-15" & bch_sorted_final$date <= "2018-04-15",]
bch_latest_model$open_ma = ma(bch_latest_model$open, order=7) # Weekly Moving Average
bch_latest_model$open_ma30 = ma(bch_latest_model$open, order=30) # Monthly Moving Average

bch_latest_model$close_ma = ma(bch_latest_model$close, order=7) # Weekly Moving Average
bch_latest_model$close_ma30 = ma(bch_latest_model$close, order=30) # Monthly Moving Average

ggplot() + 
  geom_line(data = bch_latest_model, aes(x = date, y = open, color="open Price")) + 
  geom_line(data = bch_latest_model, aes(x = date, y = open_ma, color="weekly moving average") ) + 
  geom_line(data = bch_latest_model, aes(x = date, y = open_ma30, color="weekly moving average") )
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 30 rows containing missing values (geom_path).

bch_open_ma <- ts(na.omit(bch_latest_model$open_ma))

# From the Decomposed data, we can see that it is very seasonal, and there's trend is going down

# Stationarity
adf.test(bch_open_ma, alternative = "stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  bch_open_ma
## Dickey-Fuller = -1.1542, Lag order = 6, p-value = 0.9121
## alternative hypothesis: stationary
# Stop if p-value < 0.05

bchLog <- log(bch_open_ma)
plot(bchLog, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(bchLog)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  bchLog
## Dickey-Fuller = -1.3871, Lag order = 6, p-value = 0.8336
## alternative hypothesis: stationary
bchLogDiff <- diff(bchLog)
plot(bchLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(bchLogDiff)
## Warning in adf.test(bchLogDiff): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  bchLogDiff
## Dickey-Fuller = -5.7218, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# ==========================================================
# ACF and PACF 
# ==========================================================

acf(bchLogDiff, main='ACF For BTC Differenced Series')

# q is 1

pacf(bchLogDiff, main='PACF For BTC Differenced Series')

# p is 4

# Arima Based on guessing
arimaFit <- arima(bchLogDiff, order = c(3,0,3))
arimaFit            # check the coefficients
## 
## Call:
## arima(x = bchLogDiff, order = c(3, 0, 3))
## 
## Coefficients:
##           ar1     ar2     ar3     ma1     ma2      ma3  intercept
##       -0.5907  0.5417  0.4493  1.7654  0.9148  -0.0469     0.0023
## s.e.   0.1196  0.0775  0.0689  0.1306  0.2337   0.1279     0.0080
## 
## sigma^2 estimated as 0.0004321:  log likelihood = 582.48,  aic = -1148.96
plot(arimaFit)

arimaFitFC <- forecast(bchLogDiff, model = arimaFit, h = 10)
plot(bchLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Residuals is okay! but the ACF and PACF do have one or two lines coming out!
# Find the "optimal" ARIMA parameters

auto.arima(bchLogDiff,                   # the dataset
           seasonal = FALSE,            # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -1122.439
##  ARIMA(0,0,0) with non-zero mean : -836.2027
##  ARIMA(1,0,0) with non-zero mean : -1114.798
##  ARIMA(0,0,1) with non-zero mean : -1003.544
##  ARIMA(0,0,0) with zero mean     : -836.4944
##  ARIMA(2,0,1) with non-zero mean : -1121.726
##  ARIMA(1,0,2) with non-zero mean : -1120.511
##  ARIMA(2,0,2) with non-zero mean : -1130.211
##  ARIMA(2,0,2) with zero mean     : -1131.93
##  ARIMA(1,0,2) with zero mean     : -1122.107
##  ARIMA(3,0,2) with zero mean     : -1140.436
##  ARIMA(3,0,1) with zero mean     : -1123.978
##  ARIMA(3,0,3) with zero mean     : -1144.505
##  ARIMA(4,0,4) with zero mean     : Inf
##  ARIMA(3,0,3) with non-zero mean : -1143.466
##  ARIMA(2,0,3) with zero mean     : -1142.224
##  ARIMA(4,0,3) with zero mean     : -1131.712
##  ARIMA(3,0,4) with zero mean     : -1143.512
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(3,0,3) with zero mean     : Inf
##  ARIMA(3,0,4) with zero mean     : Inf
##  ARIMA(3,0,3) with non-zero mean : Inf
##  ARIMA(2,0,3) with zero mean     : -1136.246
## 
##  Best model: ARIMA(2,0,3) with zero mean
## Series: bchLogDiff 
## ARIMA(2,0,3) with zero mean 
## 
## Coefficients:
##           ar1     ar2     ma1     ma2      ma3
##       -0.0356  0.7335  1.1905  0.0219  -0.3665
## s.e.   0.0779  0.0574  0.0850  0.1371   0.0990
## 
## sigma^2 estimated as 0.0004849:  log likelihood=574.12
## AIC=-1136.25   AICc=-1135.88   BIC=-1115.39
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(bchLogDiff, order = c(2,0,3))
arimaOpt
## 
## Call:
## arima(x = bchLogDiff, order = c(2, 0, 3))
## 
## Coefficients:
##           ar1     ar2     ma1     ma2      ma3  intercept
##       -0.0368  0.7324  1.1913  0.0232  -0.3658     0.0023
## s.e.   0.0782  0.0576  0.0852  0.1374   0.0991     0.0084
## 
## sigma^2 estimated as 0.0004746:  log likelihood = 574.16,  aic = -1134.32
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(bchLogDiff, model = arimaOpt, h = 10)
plot(bchLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals((arimaOpt), lag.max = 80))

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(bchLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

auto.arima(bchLogDiff,                   # the dataset
           seasonal = TRUE,             # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -1122.439
##  ARIMA(0,0,0) with non-zero mean : -836.2027
##  ARIMA(1,0,0) with non-zero mean : -1114.798
##  ARIMA(0,0,1) with non-zero mean : -1003.544
##  ARIMA(0,0,0) with zero mean     : -836.4944
##  ARIMA(2,0,1) with non-zero mean : -1121.726
##  ARIMA(1,0,2) with non-zero mean : -1120.511
##  ARIMA(2,0,2) with non-zero mean : -1130.211
##  ARIMA(2,0,2) with zero mean     : -1131.93
##  ARIMA(1,0,2) with zero mean     : -1122.107
##  ARIMA(3,0,2) with zero mean     : -1140.436
##  ARIMA(3,0,1) with zero mean     : -1123.978
##  ARIMA(3,0,3) with zero mean     : -1144.505
##  ARIMA(4,0,4) with zero mean     : Inf
##  ARIMA(3,0,3) with non-zero mean : -1143.466
##  ARIMA(2,0,3) with zero mean     : -1142.224
##  ARIMA(4,0,3) with zero mean     : -1131.712
##  ARIMA(3,0,4) with zero mean     : -1143.512
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(3,0,3) with zero mean     : Inf
##  ARIMA(3,0,4) with zero mean     : Inf
##  ARIMA(3,0,3) with non-zero mean : Inf
##  ARIMA(2,0,3) with zero mean     : -1136.246
## 
##  Best model: ARIMA(2,0,3) with zero mean
## Series: bchLogDiff 
## ARIMA(2,0,3) with zero mean 
## 
## Coefficients:
##           ar1     ar2     ma1     ma2      ma3
##       -0.0356  0.7335  1.1905  0.0219  -0.3665
## s.e.   0.0779  0.0574  0.0850  0.1371   0.0990
## 
## sigma^2 estimated as 0.0004849:  log likelihood=574.12
## AIC=-1136.25   AICc=-1135.88   BIC=-1115.39
# ==================================================================================
# Bitcoin Cash Close Price
# ==================================================================================

bch_close_ma <- ts(na.omit(bch_latest_model$close_ma))

# Stationarity
adf.test(bch_close_ma, alternative = "stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  bch_close_ma
## Dickey-Fuller = -1.1749, Lag order = 6, p-value = 0.9087
## alternative hypothesis: stationary
# Stop if p-value < 0.05

bchCloseLog <- log(bch_close_ma)
plot(bchCloseLog, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(bchCloseLog)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  bchCloseLog
## Dickey-Fuller = -1.2939, Lag order = 6, p-value = 0.8728
## alternative hypothesis: stationary
# Stop if p-value < 0.05

bchCloseLogDiff <- diff(bchCloseLog)
plot(bchCloseLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(bchCloseLogDiff)
## Warning in adf.test(bchCloseLogDiff): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  bchCloseLogDiff
## Dickey-Fuller = -5.8254, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# Stop if p-value < 0.05

# ==========================================================
# ACF and PACF 
# ==========================================================

acf(bchCloseLogDiff, main='ACF For BTC Differenced Series')

# q is 2

pacf(bchCloseLogDiff, main='PACF For BTC Differenced Series')

# p is 4

# Arima Based on guessing
arimaFit <- arima(bchCloseLogDiff, order = c(4,0,2))
arimaFit            # check the coefficients
## 
## Call:
## arima(x = bchCloseLogDiff, order = c(4, 0, 2))
## 
## Coefficients:
##          ar1      ar2     ar3      ar4      ma1     ma2  intercept
##       1.6189  -1.7356  1.0112  -0.1158  -0.6483  0.8978     0.0039
## s.e.  0.0870   0.1283  0.1220   0.0738   0.0547  0.0549     0.0079
## 
## sigma^2 estimated as 0.0004863:  log likelihood = 571.16,  aic = -1126.32
plot(arimaFit)

arimaFitFC <- forecast(bchCloseLogDiff, model = arimaFit, h = 10)
plot(bchCloseLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Find the "optimal" ARIMA parameters

auto.arima(bchCloseLogDiff,                   # the dataset
           seasonal = FALSE,            # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -1116.936
##  ARIMA(0,0,0) with non-zero mean : -839.0271
##  ARIMA(1,0,0) with non-zero mean : -1109.056
##  ARIMA(0,0,1) with non-zero mean : -1013.714
##  ARIMA(0,0,0) with zero mean     : -838.9558
##  ARIMA(2,0,1) with non-zero mean : -1118.002
##  ARIMA(2,0,0) with non-zero mean : -1116.745
##  ARIMA(2,0,2) with non-zero mean : -1116.002
##  ARIMA(3,0,2) with non-zero mean : -1144.888
##  ARIMA(3,0,2) with zero mean     : -1146.858
##  ARIMA(2,0,2) with zero mean     : -1117.547
##  ARIMA(4,0,2) with zero mean     : -1148.74
##  ARIMA(4,0,1) with zero mean     : -1118.735
##  ARIMA(4,0,3) with zero mean     : -1142.473
##  ARIMA(3,0,1) with zero mean     : -1119.614
##  ARIMA(5,0,3) with zero mean     : -1131.375
##  ARIMA(4,0,2) with non-zero mean : -1146.933
##  ARIMA(5,0,2) with zero mean     : -1133.322
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(4,0,2) with zero mean     : Inf
##  ARIMA(4,0,2) with non-zero mean : Inf
##  ARIMA(3,0,2) with zero mean     : Inf
##  ARIMA(3,0,2) with non-zero mean : Inf
##  ARIMA(4,0,3) with zero mean     : Inf
##  ARIMA(5,0,2) with zero mean     : Inf
##  ARIMA(5,0,3) with zero mean     : Inf
##  ARIMA(3,0,1) with zero mean     : -1115.554
## 
##  Best model: ARIMA(3,0,1) with zero mean
## Series: bchCloseLogDiff 
## ARIMA(3,0,1) with zero mean 
## 
## Coefficients:
##          ar1      ar2     ar3      ma1
##       1.0186  -0.3171  0.1178  -0.0218
## s.e.  0.5005   0.4922  0.1110   0.5028
## 
## sigma^2 estimated as 0.0005337:  log likelihood=562.78
## AIC=-1115.55   AICc=-1115.3   BIC=-1098.17
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(bchCloseLogDiff, order = c(1,0,1))
arimaOpt
## 
## Call:
## arima(x = bchCloseLogDiff, order = c(1, 0, 1))
## 
## Coefficients:
##          ar1     ma1  intercept
##       0.7377  0.2705     0.0037
## s.e.  0.0522  0.0780     0.0071
## 
## sigma^2 estimated as 0.0005265:  log likelihood = 562.4,  aic = -1116.8
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(bchCloseLogDiff, model = arimaOpt, h = 10)
plot(bchCloseLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals(arimaOpt), lag.max = 80)

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(bchCloseLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

auto.arima(bchCloseLogDiff,                   # the dataset
           seasonal = TRUE,             # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -1116.936
##  ARIMA(0,0,0) with non-zero mean : -839.0271
##  ARIMA(1,0,0) with non-zero mean : -1109.056
##  ARIMA(0,0,1) with non-zero mean : -1013.714
##  ARIMA(0,0,0) with zero mean     : -838.9558
##  ARIMA(2,0,1) with non-zero mean : -1118.002
##  ARIMA(2,0,0) with non-zero mean : -1116.745
##  ARIMA(2,0,2) with non-zero mean : -1116.002
##  ARIMA(3,0,2) with non-zero mean : -1144.888
##  ARIMA(3,0,2) with zero mean     : -1146.858
##  ARIMA(2,0,2) with zero mean     : -1117.547
##  ARIMA(4,0,2) with zero mean     : -1148.74
##  ARIMA(4,0,1) with zero mean     : -1118.735
##  ARIMA(4,0,3) with zero mean     : -1142.473
##  ARIMA(3,0,1) with zero mean     : -1119.614
##  ARIMA(5,0,3) with zero mean     : -1131.375
##  ARIMA(4,0,2) with non-zero mean : -1146.933
##  ARIMA(5,0,2) with zero mean     : -1133.322
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(4,0,2) with zero mean     : Inf
##  ARIMA(4,0,2) with non-zero mean : Inf
##  ARIMA(3,0,2) with zero mean     : Inf
##  ARIMA(3,0,2) with non-zero mean : Inf
##  ARIMA(4,0,3) with zero mean     : Inf
##  ARIMA(5,0,2) with zero mean     : Inf
##  ARIMA(5,0,3) with zero mean     : Inf
##  ARIMA(3,0,1) with zero mean     : -1115.554
## 
##  Best model: ARIMA(3,0,1) with zero mean
## Series: bchCloseLogDiff 
## ARIMA(3,0,1) with zero mean 
## 
## Coefficients:
##          ar1      ar2     ar3      ma1
##       1.0186  -0.3171  0.1178  -0.0218
## s.e.  0.5005   0.4922  0.1110   0.5028
## 
## sigma^2 estimated as 0.0005337:  log likelihood=562.78
## AIC=-1115.55   AICc=-1115.3   BIC=-1098.17
# Not SEASONAL! :D
# LiteCoin 
# plot
ggplot() + 
  geom_line(data = ltc_sorted_final, aes(x = date, y = open, color="Open Price"))

summary(ltc_sorted_final$open)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.65   41.59   55.03   95.06  160.79  359.13
# I know that in the month of December 2017 Crypto prices surges in a ridiculous rate.

# This affect the whole time series data as I can see the price before that huge spike
# to have a model of its own. Also the prices before let's say 6000 are completely useless
# now. 

ltc_latest_model <- ltc_sorted_final[ltc_sorted_final$date >= "2017-04-15" & ltc_sorted_final$date <= "2018-04-15",]
ltc_latest_model$open_ma = ma(ltc_latest_model$open, order=7) # Weekly Moving Average
ltc_latest_model$open_ma30 = ma(ltc_latest_model$open, order=30) # Monthly Moving Average

ltc_latest_model$close_ma = ma(ltc_latest_model$close, order=7) # Monthly Moving Average
ltc_latest_model$close_ma30 = ma(ltc_latest_model$close, order=30) # Monthly Moving Average

ggplot() + 
  geom_line(data = ltc_latest_model, aes(x = date, y = open, color="open Price")) + 
  geom_line(data = ltc_latest_model, aes(x = date, y = open_ma, color="weekly moving average") ) + 
  geom_line(data = ltc_latest_model, aes(x = date, y = open_ma30, color="weekly moving average") )
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 30 rows containing missing values (geom_path).

ltc_open_ma <- ts(na.omit(ltc_latest_model$open_ma))

# From the Decomposed data, we can see that it is very seasonal, and there's trend is going down

# Stationarity
adf.test(ltc_open_ma, alternative = "stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ltc_open_ma
## Dickey-Fuller = -2.3929, Lag order = 6, p-value = 0.4109
## alternative hypothesis: stationary
# Stop if p-value < 0.05

ltcLog <- log(ltc_open_ma)
plot(ltcLog, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(ltcLog)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ltcLog
## Dickey-Fuller = -2.3157, Lag order = 6, p-value = 0.4435
## alternative hypothesis: stationary
ltcLogDiff <- diff(ltcLog)
plot(ltcLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(ltcLogDiff)
## Warning in adf.test(ltcLogDiff): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ltcLogDiff
## Dickey-Fuller = -7.8222, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# ==========================================================
# ACF and PACF 
# ==========================================================

acf(ltcLogDiff, main='ACF For BTC Differenced Series')

# q is 2

pacf(ltcLogDiff, main='PACF For BTC Differenced Series')

# p is 3

# Arima Based on guessing
arimaFit <- arima(ltcLogDiff, order = c(3,0,2))
arimaFit            # check the coefficients
## 
## Call:
## arima(x = ltcLogDiff, order = c(3, 0, 2))
## 
## Coefficients:
##          ar1     ar2      ar3     ma1      ma2  intercept
##       0.7476  0.8167  -0.6655  0.1604  -0.6527     0.0080
## s.e.  0.1409  0.0974   0.1078  0.1566   0.1422     0.0046
## 
## sigma^2 estimated as 0.0002809:  log likelihood = 901.69,  aic = -1789.37
plot(arimaFit)

arimaFitFC <- forecast(ltcLogDiff, model = arimaFit, h = 10)
plot(ltcLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Residuals is okay! but the ACF and PACF do have one or two lines coming out!
# Find the "optimal" ARIMA parameters

auto.arima(ltcLogDiff,                   # the dataset
           seasonal = FALSE,            # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -1791.877
##  ARIMA(0,0,0) with non-zero mean : -1334.011
##  ARIMA(1,0,0) with non-zero mean : -1789.44
##  ARIMA(0,0,1) with non-zero mean : -1583.731
##  ARIMA(0,0,0) with zero mean     : -1316.386
##  ARIMA(2,0,1) with non-zero mean : -1794.232
##  ARIMA(2,0,0) with non-zero mean : -1792.172
##  ARIMA(2,0,2) with non-zero mean : -1793.571
##  ARIMA(3,0,2) with non-zero mean : -1790.972
##  ARIMA(2,0,1) with zero mean     : -1793.99
##  ARIMA(3,0,1) with non-zero mean : -1792.74
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(2,0,1) with non-zero mean : -1791.481
## 
##  Best model: ARIMA(2,0,1) with non-zero mean
## Series: ltcLogDiff 
## ARIMA(2,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ma1    mean
##       1.6832  -0.7314  -0.7600  0.0080
## s.e.  0.1264   0.1051   0.1423  0.0045
## 
## sigma^2 estimated as 0.0002859:  log likelihood=900.74
## AIC=-1791.48   AICc=-1791.3   BIC=-1772.37
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(ltcLogDiff, order = c(2,0,1))
arimaOpt
## 
## Call:
## arima(x = ltcLogDiff, order = c(2, 0, 1))
## 
## Coefficients:
##          ar1      ar2      ma1  intercept
##       1.6832  -0.7314  -0.7600     0.0080
## s.e.  0.1264   0.1051   0.1423     0.0045
## 
## sigma^2 estimated as 0.0002825:  log likelihood = 900.74,  aic = -1791.48
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(ltcLogDiff, model = arimaOpt, h = 10)
plot(ltcLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals((arimaOpt), lag.max = 80))

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(bchLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

auto.arima(ltcLogDiff,                   # the dataset
           seasonal = TRUE,             # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -1791.877
##  ARIMA(0,0,0) with non-zero mean : -1334.011
##  ARIMA(1,0,0) with non-zero mean : -1789.44
##  ARIMA(0,0,1) with non-zero mean : -1583.731
##  ARIMA(0,0,0) with zero mean     : -1316.386
##  ARIMA(2,0,1) with non-zero mean : -1794.232
##  ARIMA(2,0,0) with non-zero mean : -1792.172
##  ARIMA(2,0,2) with non-zero mean : -1793.571
##  ARIMA(3,0,2) with non-zero mean : -1790.972
##  ARIMA(2,0,1) with zero mean     : -1793.99
##  ARIMA(3,0,1) with non-zero mean : -1792.74
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(2,0,1) with non-zero mean : -1791.481
## 
##  Best model: ARIMA(2,0,1) with non-zero mean
## Series: ltcLogDiff 
## ARIMA(2,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ma1    mean
##       1.6832  -0.7314  -0.7600  0.0080
## s.e.  0.1264   0.1051   0.1423  0.0045
## 
## sigma^2 estimated as 0.0002859:  log likelihood=900.74
## AIC=-1791.48   AICc=-1791.3   BIC=-1772.37
# It is not seasonal! 

# ==================================================================================
# Litecoin Close Price
# ==================================================================================

ltc_close_ma <- ts(na.omit(ltc_latest_model$close_ma))

# Stationarity
adf.test(ltc_close_ma, alternative = "stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ltc_close_ma
## Dickey-Fuller = -2.4328, Lag order = 6, p-value = 0.3941
## alternative hypothesis: stationary
# Stop if p-value < 0.05

ltcCloseLog <- log(ltc_close_ma)
plot(ltcCloseLog, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(ltcCloseLog)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ltcCloseLog
## Dickey-Fuller = -2.3782, Lag order = 6, p-value = 0.4171
## alternative hypothesis: stationary
# Stop if p-value < 0.05

ltcCloseLogDiff <- diff(ltcCloseLog)
plot(ltcCloseLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

adf.test(ltcCloseLogDiff)
## Warning in adf.test(ltcCloseLogDiff): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ltcCloseLogDiff
## Dickey-Fuller = -7.8168, Lag order = 6, p-value = 0.01
## alternative hypothesis: stationary
# Stop if p-value < 0.05

# ==========================================================
# ACF and PACF 
# ==========================================================

acf(ltcCloseLogDiff, main='ACF For BTC Differenced Series')

# q is 2

pacf(ltcCloseLogDiff, main='PACF For BTC Differenced Series')

# p is 4

# Arima Based on guessing
arimaFit <- arima(ltcCloseLogDiff, order = c(4,0,2))
arimaFit            # check the coefficients
## 
## Call:
## arima(x = ltcCloseLogDiff, order = c(4, 0, 2))
## 
## Coefficients:
##          ar1      ar2     ar3      ar4      ma1     ma2  intercept
##       2.1010  -2.1249  1.1093  -0.2070  -1.2323  0.9996     0.0081
## s.e.  0.0532   0.1103  0.1103   0.0531   0.0246  0.0387     0.0054
## 
## sigma^2 estimated as 0.0002514:  log likelihood = 917.16,  aic = -1818.33
plot(arimaFit)

arimaFitFC <- forecast(ltcCloseLogDiff, model = arimaFit, h = 10)
plot(ltcCloseLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaFitFC$fitted, type="l", col = "red", lwd = 2)

plot(arimaFitFC)

tsdisplay(residuals(arimaFit), lag.max = 80)

# Find the "optimal" ARIMA parameters

auto.arima(ltcCloseLogDiff,                   # the dataset
           seasonal = FALSE,            # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -1787.8
##  ARIMA(0,0,0) with non-zero mean : -1333.854
##  ARIMA(1,0,0) with non-zero mean : -1786.529
##  ARIMA(0,0,1) with non-zero mean : -1580.798
##  ARIMA(0,0,0) with zero mean     : -1316.402
##  ARIMA(2,0,1) with non-zero mean : -1789.651
##  ARIMA(2,0,0) with non-zero mean : -1787.509
##  ARIMA(2,0,2) with non-zero mean : -1788.878
##  ARIMA(3,0,2) with non-zero mean : -1785.584
##  ARIMA(2,0,1) with zero mean     : -1789.305
##  ARIMA(3,0,1) with non-zero mean : -1786.759
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(2,0,1) with non-zero mean : -1787.459
## 
##  Best model: ARIMA(2,0,1) with non-zero mean
## Series: ltcCloseLogDiff 
## ARIMA(2,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ma1    mean
##       1.6828  -0.7299  -0.7645  0.0082
## s.e.  0.1271   0.1057   0.1421  0.0046
## 
## sigma^2 estimated as 0.0002893:  log likelihood=898.73
## AIC=-1787.46   AICc=-1787.28   BIC=-1768.34
# Fit the "optimal" ARIMA(p,d,q) model
arimaOpt <- arima(ltcCloseLogDiff, order = c(1,0,1))
arimaOpt
## 
## Call:
## arima(x = ltcCloseLogDiff, order = c(1, 0, 1))
## 
## Coefficients:
##          ar1     ma1  intercept
##       0.8282  0.1164     0.0084
## s.e.  0.0350  0.0634     0.0059
## 
## sigma^2 estimated as 0.000288:  log likelihood = 897.53,  aic = -1787.06
plot(arimaOpt)

# Forecast using optimal ARIMA
# Within the training dataset
arimaOptFC <- forecast(ltcCloseLogDiff, model = arimaOpt, h = 10)
plot(ltcCloseLogDiff, type="l", col = "lightblue", 
     lwd = 3, xlab = "", ylab = "")
points(arimaOptFC$fitted, type="l", col = "red", lwd = 2)

# Beyond the training dataset
plot(arimaOptFC)

tsdisplay(residuals(arimaOpt), lag.max = 80)

# =======================================================
# ARIMA(p,d,q)(P,D,Q)[s] modeling of Seasonal Time Series
plot(ltcCloseLogDiff, type="l", col = "blue", 
     lwd = 3, xlab = "", ylab = "")

auto.arima(ltcCloseLogDiff,                   # the dataset
           seasonal = TRUE,             # seasonality
           stationary = TRUE,           # stationarity
           max.p = 5, max.q = 5,        # range of p,q (non-seasonal)
           max.P = 5, max.Q = 5,        # range of P,Q (seasonal)
           max.d = 2, max.D = 2,        # range of d,D (differences)
           start.p = 1, start.q = 1,    # start for stepwise search
           start.P = 1, start.Q = 1,    # start for stepwise search 
           ic = "aic",                  # criteria to compare
           stepwise = TRUE,             # stepwise selection (faster)
           trace = TRUE)                # all ARIMA models reported
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(1,0,1) with non-zero mean : -1787.8
##  ARIMA(0,0,0) with non-zero mean : -1333.854
##  ARIMA(1,0,0) with non-zero mean : -1786.529
##  ARIMA(0,0,1) with non-zero mean : -1580.798
##  ARIMA(0,0,0) with zero mean     : -1316.402
##  ARIMA(2,0,1) with non-zero mean : -1789.651
##  ARIMA(2,0,0) with non-zero mean : -1787.509
##  ARIMA(2,0,2) with non-zero mean : -1788.878
##  ARIMA(3,0,2) with non-zero mean : -1785.584
##  ARIMA(2,0,1) with zero mean     : -1789.305
##  ARIMA(3,0,1) with non-zero mean : -1786.759
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(2,0,1) with non-zero mean : -1787.459
## 
##  Best model: ARIMA(2,0,1) with non-zero mean
## Series: ltcCloseLogDiff 
## ARIMA(2,0,1) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ma1    mean
##       1.6828  -0.7299  -0.7645  0.0082
## s.e.  0.1271   0.1057   0.1421  0.0046
## 
## sigma^2 estimated as 0.0002893:  log likelihood=898.73
## AIC=-1787.46   AICc=-1787.28   BIC=-1768.34